library(tidyverse)
library(dplyr, warn.conflicts = FALSE)
library(here)
library(plotly)
library(highcharter)
theme_set(theme_bw())

Data Overview

data <- read_csv(here::here("data/jackson.csv"), 
                            col_types = cols(
                                .default = col_double(),
                                album_uri = col_character(),
                                album_name = col_character(),
                                album_img = col_character(),
                                album_release_date = col_character(),
                                album_release_year = col_date(format = ""),
                                album_popularity = col_integer(),
                                track_name = col_character(),
                                track_uri = col_character(),
                                key = col_character(),
                                mode = col_character(),
                                time_signature = col_integer(),
                                key_mode = col_character(),
                                track_popularity = col_integer()
                                ))
data %>% 
    glimpse()
Observations: 500
Variables: 23
$ album_uri          <chr> "5T9tTjPIfjbUJGRJdYOOLl", "5T9tTjPIfjbUJGRJdYOOLl", "5T9tTjPIfjbU...
$ album_name         <chr> "Jackson Do Pandeiro Volume 1: Tum, Tum, Tum!", "Jackson Do Pande...
$ album_img          <chr> "https://i.scdn.co/image/5dcc4a0cad740f1ee0774196d0a14f3693ef8879...
$ album_release_date <chr> "1958-11-11", "1958-11-11", "1958-11-11", "1958-11-11", "1958-11-...
$ album_release_year <date> 1958-11-11, 1958-11-11, 1958-11-11, 1958-11-11, 1958-11-11, 1958...
$ album_popularity   <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
$ track_name         <chr> "Tum, Tum, Tum", "Pacífico Pacato", "Nortista Quatrocentão", "Sem...
$ track_uri          <chr> "6cCYhV6fU68uzbjWPG9V7x", "6Gu7y9SgtVTGh8YGhDPtCe", "1hq7M7cJtvDg...
$ danceability       <dbl> 0.501, 0.663, 0.550, 0.447, 0.544, 0.571, 0.495, 0.572, 0.500, 0....
$ energy             <dbl> 0.987, 0.962, 0.947, 0.969, 0.972, 0.926, 0.967, 0.986, 0.947, 0....
$ key                <chr> "A", "F", "D", "G", "E", "F", "E", "C", "F", "A#", "E", "F", "D#"...
$ loudness           <dbl> 2.561, 1.137, 1.621, 2.743, 2.513, 2.414, 2.375, 2.597, 3.078, 3....
$ mode               <chr> "major", "major", "major", "major", "minor", "major", "minor", "m...
$ speechiness        <dbl> 0.0429, 0.1810, 0.0469, 0.0549, 0.0502, 0.0344, 0.0576, 0.0367, 0...
$ acousticness       <dbl> 0.718, 0.738, 0.666, 0.759, 0.787, 0.651, 0.712, 0.194, 0.286, 0....
$ instrumentalness   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
$ liveness           <dbl> 0.282, 0.200, 0.251, 0.333, 0.176, 0.342, 0.321, 0.301, 0.323, 0....
$ valence            <dbl> 0.963, 0.961, 0.923, 0.899, 0.783, 0.961, 0.755, 0.989, 0.957, 0....
$ tempo              <dbl> 101.676, 113.562, 116.125, 116.023, 112.863, 133.065, 117.822, 10...
$ duration_ms        <dbl> 158133, 139773, 163173, 143733, 151653, 157480, 158133, 154680, 1...
$ time_signature     <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,...
$ key_mode           <chr> "A major", "F major", "D major", "G major", "E minor", "F major",...
$ track_popularity   <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,...

Danceability

data %>% 
    ggplot(aes(sample=danceability)) + 
        stat_qq()

data %>% 
    ggplot(aes(danceability, ..density..)) +
    geom_histogram(binwidth = 0.05,
                   fill="grey",
                   color="black") +
    scale_x_continuous(breaks=seq(0,1,0.05)) 

Speechines

data %>% 
    ggplot(aes(sample=speechiness)) + 
        stat_qq()

data %>% 
    ggplot(aes(speechiness, ..density..)) +
    geom_histogram(binwidth = 0.1,
                   fill="grey",
                   color="black") +
    scale_x_continuous(breaks=seq(-0.05,1,0.1)) 

Duration

data <- data %>%
    mutate(duration_s = duration_ms/1000)
data %>% 
    select(duration_s) %>%
    glimpse()
Observations: 500
Variables: 1
$ duration_s <dbl> 158.133, 139.773, 163.173, 143.733, 151.653, 157.480, 158.133, 154.680, 1...
data %>% 
    ggplot(aes(sample=duration_s)) + 
        stat_qq()

data %>% 
    ggplot(aes(duration_s, ..density..)) +
    geom_histogram(binwidth = 15,
                   fill="grey",
                   color="black") +
    scale_x_continuous(breaks=seq(0,300,10)) 

Mais discurso/diálogo (speechines) tem efeito sobre a dançabilidade da música?

p <- data %>% 
        group_by(track_name) %>%
        unique() %>%
        ggplot(aes(x=speechiness,
                   y=danceability)) +
        geom_point(alpha=0.4) 
ggplotly(p)
data %>% 
    group_by(track_name) %>%
    unique() %>%
    ggplot(aes(speechiness,danceability)) +
    stat_density2d(aes(fill = ..level..), geom = "polygon")

data %>%
    group_by(track_name) %>%
    top_n(10, speechiness)

Como o passar dos anos afeta o tempo de duração da música?

Ultimo album “1981: Isso é que é Forró!”

data <- data %>%
    mutate(remaster = album_release_date > "1981-30-12")
data %>% 
    ggplot(aes(x=as.factor(album_release_year),
               duration_s,
               group=album_release_year,
               color=remaster)) +
    geom_jitter(position = position_dodge(width = 0.9),
                alpha=0.3) +
    geom_boxplot(outlier.colour = NA,position = "dodge", alpha=0.6) +
    theme(axis.text.x = element_text(angle = 30, hjust = 1))

Os albuns lançados mais recentemente (relançamentos/remasterizados) são mais populares?

# lollipop chart
m <- list(
  l = 505)
p <- data %>%
        ggplot(aes(album_popularity,y=reorder(album_name,album_popularity),color=remaster)) +
            geom_segment(aes(x = 0, y = reorder(album_name,album_popularity), 
                             xend = album_popularity, 
                             yend = album_name),
                         color = "grey50") + 
        geom_point() +
        theme(axis.title.y=element_blank())
ggplotly(p) %>%
  layout(autosize = F,
         margin = m,
         height=600,
         width=800)
Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()
LS0tCnRpdGxlOiAiRURBIFZJUyBjYW7Dp8O1ZXMiCnN1YnRpdGxlOiAnQW7DoWxpc2Ugc29icmUgSmFja3NvbiBkbyBQYW5kZWlybycKYXV0aG9yOiAiSm9zw6kgQmVuYXJkaSBkZSBTb3V6YSBOdW5lcyIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6CiAgICB0b2M6IHllcwogICAgdG9jX2Zsb2F0OiB5ZXMKICBodG1sX2RvY3VtZW50OgogICAgZGZfcHJpbnQ6IHBhZ2VkCiAgICB0b2M6IHllcwogICAgdG9jX2Zsb2F0OiB5ZXMKLS0tCgpgYGB7cn0KbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkoZHBseXIsIHdhcm4uY29uZmxpY3RzID0gRkFMU0UpCmxpYnJhcnkoaGVyZSkKbGlicmFyeShwbG90bHkpCmxpYnJhcnkoaGlnaGNoYXJ0ZXIpCnRoZW1lX3NldCh0aGVtZV9idygpKQpgYGAKCiMjIERhdGEgT3ZlcnZpZXcKCmBgYHtyfQpkYXRhIDwtIHJlYWRfY3N2KGhlcmU6OmhlcmUoImRhdGEvamFja3Nvbi5jc3YiKSwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBjb2xfdHlwZXMgPSBjb2xzKAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIC5kZWZhdWx0ID0gY29sX2RvdWJsZSgpLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGFsYnVtX3VyaSA9IGNvbF9jaGFyYWN0ZXIoKSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBhbGJ1bV9uYW1lID0gY29sX2NoYXJhY3RlcigpLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGFsYnVtX2ltZyA9IGNvbF9jaGFyYWN0ZXIoKSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBhbGJ1bV9yZWxlYXNlX2RhdGUgPSBjb2xfY2hhcmFjdGVyKCksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgYWxidW1fcmVsZWFzZV95ZWFyID0gY29sX2RhdGUoZm9ybWF0ID0gIiIpLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGFsYnVtX3BvcHVsYXJpdHkgPSBjb2xfaW50ZWdlcigpLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHRyYWNrX25hbWUgPSBjb2xfY2hhcmFjdGVyKCksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdHJhY2tfdXJpID0gY29sX2NoYXJhY3RlcigpLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGtleSA9IGNvbF9jaGFyYWN0ZXIoKSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBtb2RlID0gY29sX2NoYXJhY3RlcigpLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHRpbWVfc2lnbmF0dXJlID0gY29sX2ludGVnZXIoKSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBrZXlfbW9kZSA9IGNvbF9jaGFyYWN0ZXIoKSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB0cmFja19wb3B1bGFyaXR5ID0gY29sX2ludGVnZXIoKQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICkpCmRhdGEgJT4lIAogICAgZ2xpbXBzZSgpCmBgYAoKIyMjIERhbmNlYWJpbGl0eQoKYGBge3J9CmRhdGEgJT4lIAogICAgZ2dwbG90KGFlcyhzYW1wbGU9ZGFuY2VhYmlsaXR5KSkgKyAKICAgICAgICBzdGF0X3FxKCkKYGBgCgpgYGB7cn0KZGF0YSAlPiUgCiAgICBnZ3Bsb3QoYWVzKGRhbmNlYWJpbGl0eSwgLi5kZW5zaXR5Li4pKSArCiAgICBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aCA9IDAuMDUsCiAgICAgICAgICAgICAgICAgICBmaWxsPSJncmV5IiwKICAgICAgICAgICAgICAgICAgIGNvbG9yPSJibGFjayIpICsKICAgIHNjYWxlX3hfY29udGludW91cyhicmVha3M9c2VxKDAsMSwwLjA1KSkgCmBgYAoKIyMjIFNwZWVjaGluZXMKCmBgYHtyfQpkYXRhICU+JSAKICAgIGdncGxvdChhZXMoc2FtcGxlPXNwZWVjaGluZXNzKSkgKyAKICAgICAgICBzdGF0X3FxKCkKYGBgCgpgYGB7cn0KZGF0YSAlPiUgCiAgICBnZ3Bsb3QoYWVzKHNwZWVjaGluZXNzLCAuLmRlbnNpdHkuLikpICsKICAgIGdlb21faGlzdG9ncmFtKGJpbndpZHRoID0gMC4xLAogICAgICAgICAgICAgICAgICAgZmlsbD0iZ3JleSIsCiAgICAgICAgICAgICAgICAgICBjb2xvcj0iYmxhY2siKSArCiAgICBzY2FsZV94X2NvbnRpbnVvdXMoYnJlYWtzPXNlcSgtMC4wNSwxLDAuMSkpIAoKYGBgCgojIyMgRHVyYXRpb24KCmBgYHtyfQpkYXRhIDwtIGRhdGEgJT4lCiAgICBtdXRhdGUoZHVyYXRpb25fcyA9IGR1cmF0aW9uX21zLzEwMDApCgpkYXRhICU+JSAKICAgIHNlbGVjdChkdXJhdGlvbl9zKSAlPiUKICAgIGdsaW1wc2UoKQpgYGAKCgpgYGB7cn0KZGF0YSAlPiUgCiAgICBnZ3Bsb3QoYWVzKHNhbXBsZT1kdXJhdGlvbl9zKSkgKyAKICAgICAgICBzdGF0X3FxKCkKYGBgCgpgYGB7cn0KZGF0YSAlPiUgCiAgICBnZ3Bsb3QoYWVzKGR1cmF0aW9uX3MsIC4uZGVuc2l0eS4uKSkgKwogICAgZ2VvbV9oaXN0b2dyYW0oYmlud2lkdGggPSAxNSwKICAgICAgICAgICAgICAgICAgIGZpbGw9ImdyZXkiLAogICAgICAgICAgICAgICAgICAgY29sb3I9ImJsYWNrIikgKwogICAgc2NhbGVfeF9jb250aW51b3VzKGJyZWFrcz1zZXEoMCwzMDAsMTApKSAKYGBgCgojIyBNYWlzIGRpc2N1cnNvL2Rpw6Fsb2dvIChzcGVlY2hpbmVzKSB0ZW0gZWZlaXRvIHNvYnJlIGEgZGFuw6dhYmlsaWRhZGUgZGEgbcO6c2ljYT8KCmBgYHtyfQpwIDwtIGRhdGEgJT4lIAogICAgICAgIGdyb3VwX2J5KHRyYWNrX25hbWUpICU+JQogICAgICAgIHVuaXF1ZSgpICU+JQogICAgICAgIGdncGxvdChhZXMoeD1zcGVlY2hpbmVzcywKICAgICAgICAgICAgICAgICAgIHk9ZGFuY2VhYmlsaXR5KSkgKwogICAgICAgIGdlb21fcG9pbnQoYWxwaGE9MC40KSAKCmdncGxvdGx5KHApCmBgYAoKYGBge3J9CmRhdGEgJT4lIAogICAgZ3JvdXBfYnkodHJhY2tfbmFtZSkgJT4lCiAgICB1bmlxdWUoKSAlPiUKICAgIGdncGxvdChhZXMoc3BlZWNoaW5lc3MsZGFuY2VhYmlsaXR5KSkgKwogICAgc3RhdF9kZW5zaXR5MmQoYWVzKGZpbGwgPSAuLmxldmVsLi4pLCBnZW9tID0gInBvbHlnb24iKQpgYGAKCmBgYHtyfQpkYXRhICU+JQogICAgZ3JvdXBfYnkodHJhY2tfbmFtZSkgJT4lCiAgICB0b3BfbigxMCwgc3BlZWNoaW5lc3MpCmBgYAoKIyMgQ29tbyBvIHBhc3NhciBkb3MgYW5vcyBhZmV0YSBvIHRlbXBvIGRlIGR1cmHDp8OjbyBkYSBtw7pzaWNhPwoKVWx0aW1vIGFsYnVtICIxOTgxOiBJc3NvIMOpIHF1ZSDDqSBGb3Jyw7MhIgoKCmBgYHtyfQpkYXRhIDwtIGRhdGEgJT4lCiAgICBtdXRhdGUocmVtYXN0ZXIgPSBhbGJ1bV9yZWxlYXNlX2RhdGUgPiAiMTk4MS0zMC0xMiIpCmBgYAoKYGBge3J9CmRhdGEgJT4lIAogICAgZ2dwbG90KGFlcyh4PWFzLmZhY3RvcihhbGJ1bV9yZWxlYXNlX3llYXIpLAogICAgICAgICAgICAgICBkdXJhdGlvbl9zLAogICAgICAgICAgICAgICBncm91cD1hbGJ1bV9yZWxlYXNlX3llYXIsCiAgICAgICAgICAgICAgIGNvbG9yPXJlbWFzdGVyKSkgKwogICAgZ2VvbV9qaXR0ZXIocG9zaXRpb24gPSBwb3NpdGlvbl9kb2RnZSh3aWR0aCA9IDAuOSksCiAgICAgICAgICAgICAgICBhbHBoYT0wLjMpICsKICAgIGdlb21fYm94cGxvdChvdXRsaWVyLmNvbG91ciA9IE5BLHBvc2l0aW9uID0gImRvZGdlIiwgYWxwaGE9MC42KSArCiAgICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZSA9IDMwLCBoanVzdCA9IDEpKQoKYGBgCgojIyMjIE9zIGFsYnVucyBsYW7Dp2Fkb3MgbWFpcyByZWNlbnRlbWVudGUgKHJlbGFuw6dhbWVudG9zL3JlbWFzdGVyaXphZG9zKSBzw6NvIG1haXMgcG9wdWxhcmVzPyAKCmBgYHtyfQojIGxvbGxpcG9wIGNoYXJ0Cm0gPC0gbGlzdCgKICBsID0gNTA1KQoKcCA8LSBkYXRhICU+JQogICAgICAgIGdncGxvdChhZXMoYWxidW1fcG9wdWxhcml0eSx5PXJlb3JkZXIoYWxidW1fbmFtZSxhbGJ1bV9wb3B1bGFyaXR5KSxjb2xvcj1yZW1hc3RlcikpICsKICAgICAgICAgICAgZ2VvbV9zZWdtZW50KGFlcyh4ID0gMCwgeSA9IHJlb3JkZXIoYWxidW1fbmFtZSxhbGJ1bV9wb3B1bGFyaXR5KSwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgeGVuZCA9IGFsYnVtX3BvcHVsYXJpdHksIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgIHllbmQgPSBhbGJ1bV9uYW1lKSwKICAgICAgICAgICAgICAgICAgICAgICAgIGNvbG9yID0gImdyZXk1MCIpICsgCiAgICAgICAgZ2VvbV9wb2ludCgpICsKICAgICAgICB0aGVtZShheGlzLnRpdGxlLnk9ZWxlbWVudF9ibGFuaygpKQoKZ2dwbG90bHkocCkgJT4lCiAgbGF5b3V0KGF1dG9zaXplID0gRiwKICAgICAgICAgbWFyZ2luID0gbSwKICAgICAgICAgaGVpZ2h0PTYwMCwKICAgICAgICAgd2lkdGg9ODAwKQpgYGAKCg==